Base de dados Utilizada

A principio foram utilizados dados disponiveis no repositorio pertencente a Wesley Cota https://github.com/wcota/covid19br e Rami Krispin https://github.com/RamiKrispin/covid19sf. Estes pacotes forncem uma base de dados sobre casos de covid no Brasil e no mundo.

Requisitos

Serão necessarios os eguintes pacotes:

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(geobr)
## Loading required namespace: sf
library(tmap)
## Registered S3 methods overwritten by 'stars':
##   method             from
##   st_bbox.SpatRaster sf  
##   st_crs.SpatRaster  sf

Eles serão necessários para ler, obter, tratar e juntar bases de dados tabulares e dados georreferenciados

Realizando o download dos dados

getwd()
## [1] "C:/Users/Fellipe/Desktop/git/fellipe.mira.github.io"
setwd("C:/Users/Fellipe/Desktop/git/fellipe.mira.github.io/")

url <- "https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv"

dados <- readr::read_csv(url)
## Rows: 18494 Columns: 26
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr   (3): country, state, city
## dbl  (22): epi_week, newDeaths, deaths, newCases, totalCases, deathsMS, tota...
## date  (1): date
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

Analisando as estruturas dos dados

Analisando as estruturas dos dados disponiveis no github

glimpse(dados)
## Rows: 18,494
## Columns: 26
## $ epi_week                              <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10~
## $ date                                  <date> 2020-02-25, 2020-02-25, 2020-02~
## $ country                               <chr> "Brazil", "Brazil", "Brazil", "B~
## $ state                                 <chr> "SP", "TOTAL", "SP", "TOTAL", "S~
## $ city                                  <chr> "TOTAL", "TOTAL", "TOTAL", "TOTA~
## $ newDeaths                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ deaths                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ newCases                              <dbl> 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,~
## $ totalCases                            <dbl> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,~
## $ deathsMS                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ totalCasesMS                          <dbl> 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2,~
## $ deaths_per_100k_inhabitants           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ totalCases_per_100k_inhabitants       <dbl> 0.00218, 0.00047, 0.00218, 0.000~
## $ deaths_by_totalCases                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ recovered                             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ suspects                              <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ tests                                 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ tests_per_100k_inhabitants            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated                            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_per_100_inhabitants        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_second                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_second_per_100_inhabitants <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_single                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_single_per_100_inhabitants <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_third                      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_third_per_100_inhabitants  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~

Como nosso objetivo é trabalhar com os estados devemos transformar a variavel “state” para factor

dados$state <- as.factor(dados$state)

Preparando os dados:

seleceionaremos as colunas state e deaths, agruparemos por estado a partir da soma do numero de dados

a <- dados %>%
  select(state, deaths) %>%
  group_by(state) %>%
  summarise(total_estado = sum(deaths))
state total_estado
AC 694774
AL 2196551
AM 5398133
AP 750333
BA 8928256
CE 9008901
DF 3635919
ES 4465196
GO 7238361
MA 3742990
MG 16083919
MS 2820579
MT 4529184
PA 6394536
PB 3283874
PE 7575340
PI 2484523
PR 11107877
RJ 22755645
RN 2654757
RO 2180167
RR 721558
RS 10795835
SC 5971961
SE 2159415
SP 48493877
TO 1232323
TOTAL 197304784

Dados georreferenciados

Até agora trabalhamos apenas com dados tabulares, mas com o auxilio do pacote geobr faremos o download de dados vetoriais dos estados brasileiros.

states <- read_state(year = 2019)
## Using year 2019
## Loading data for the whole country
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
glimpse(states)
## Rows: 27
## Columns: 6
## $ code_state   <dbl> 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 2~
## $ abbrev_state <chr> "RO", "AC", "AM", "RR", "PA", "AP", "TO", "MA", "PI", "CE~
## $ name_state   <chr> "Rondônia", "Acre", "Amazônas", "Roraima", "Pará", "Amapá~
## $ code_region  <dbl> 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, ~
## $ name_region  <chr> "Norte", "Norte", "Norte", "Norte", "Norte", "Norte", "No~
## $ geom         <MULTIPOLYGON [°]> MULTIPOLYGON (((-65.3815 -1..., MULTIPOLYGON~

Conhecendo os dados podemos realizar o join das duas bases de dados

dados_com_geom <- right_join(x = a,
                             y = states,
                             by= c("state" = "abbrev_state"))

Agora iremos avaliar como nossa base de dados está estruturada

dados_com_geom %>% glimpse()
## Rows: 27
## Columns: 7
## $ state        <chr> "AC", "AL", "AM", "AP", "BA", "CE", "DF", "ES", "GO", "MA~
## $ total_estado <dbl> 694774, 2196551, 5398133, 750333, 8928256, 9008901, 36359~
## $ code_state   <dbl> 12, 27, 13, 16, 29, 23, 53, 32, 52, 21, 31, 50, 51, 15, 2~
## $ name_state   <chr> "Acre", "Alagoas", "Amazônas", "Amapá", "Bahia", "Ceará",~
## $ code_region  <dbl> 1, 2, 1, 1, 2, 2, 5, 3, 5, 2, 3, 5, 5, 1, 2, 2, 2, 4, 3, ~
## $ name_region  <chr> "Norte", "Nordeste", "Norte", "Norte", "Nordeste", "Norde~
## $ geom         <MULTIPOLYGON [°]> MULTIPOLYGON (((-71.07772 -..., MULTIPOLYGON~
head(dados_com_geom)
## # A tibble: 6 x 7
##   state total_estado code_state name_state code_region name_region
##   <chr>        <dbl>      <dbl> <chr>            <dbl> <chr>      
## 1 AC          694774         12 Acre                 1 Norte      
## 2 AL         2196551         27 Alagoas              2 Nordeste   
## 3 AM         5398133         13 Amazônas             1 Norte      
## 4 AP          750333         16 Amapá                1 Norte      
## 5 BA         8928256         29 Bahia                2 Nordeste   
## 6 CE         9008901         23 Ceará                2 Nordeste   
## # ... with 1 more variable: geom <MULTIPOLYGON [°]>

Plotando os dados com o tmap package

primeiramente iremos transformar os dados que foram unidos para classe sf (simple feature) o qual é util para transformar dados tabulares em dados “spatio-temporal” (por este motivo as funções começam com st_)

dados_com_geom <- sf::st_as_sf(as.data.frame(dados_com_geom))

tmap_mode(mode = 'view')
## tmap mode set to interactive viewing
tm_shape(dados_com_geom)+
  tm_fill('total_estado',
          palette = 'Blues')+
  tm_shape(dados_com_geom)+
  tm_borders(col = 'gray')